import pandas as pd
import numpy as np
import json
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme()

frame=pd.DataFrame([json.loads(line) for line in open('example.txt','r',encoding='utf8')])
print(frame.head())
# 处理缺失值
clean_tz=frame['tz'].fillna('Missing')
clean_tz[clean_tz=='']='Unknown'
# 地区用户数
subset=clean_tz.value_counts()[:10]
sns.barplot(x=subset.index,y=subset.values)
plt.show()
#浏览器用户数
b=pd.Series([x.split()[0] for x in frame['a'].dropna()])
print(b.head())
subset=b.value_counts()[:5]
sns.barplot(x=subset.index,y=subset.values)
plt.show()

#操作系统用户数
o=frame[frame['a'].notnull()]
o['os']=np.where(o['a'].str.contains('Windows'),'Windows','Not Windows')
print(o['os'].head())
sns.barplot(x=o['os'].value_counts().index,y=o['os'].value_counts().values)
plt.show()
# 地区和操作系统
to=o.groupby(['tz','os'])['a'].count()
# print(to.head())
to[2:10].unstack().plot(kind='bar')
# sns.barplot(x=to.index,y=to.values)
plt.show()